# supress Warning
import warnings
warnings.filterwarnings("ignore")

# Importing necessarylibraries 
import pandas as pd
import numpy as np

Bikesharing = pd.read_csv("day.csv")

Bikesharing.head()

Bikesharing.shape

(730, 16)

Bikesharing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   instant     730 non-null    int64  
 1   dteday      730 non-null    object 
 2   season      730 non-null    int64  
 3   yr          730 non-null    int64  
 4   mnth        730 non-null    int64  
 5   holiday     730 non-null    int64  
 6   weekday     730 non-null    int64  
 7   workingday  730 non-null    int64  
 8   weathersit  730 non-null    int64  
 9   temp        730 non-null    float64
 10  atemp       730 non-null    float64
 11  hum         730 non-null    float64
 12  windspeed   730 non-null    float64
 13  casual      730 non-null    int64  
 14  registered  730 non-null    int64  
 15  cnt         730 non-null    int64  
dtypes: float64(4), int64(11), object(1)
memory usage: 91.4+ KB

Bikesharing.describe()

# copying the Bikesharing data set
data = Bikesharing.copy()

data.rename(columns={"yr" : "year", "mnth":"month", "temp":"Temperature" ,"hum":"humidity"}, inplace=True)

data.head()

drop_columns = ['instant','dteday','casual','registered']
data = data.drop(drop_columns, axis=1)

data.head()

data[['Temperature','atemp','humidity','windspeed']].corr()

data.drop('atemp', axis =1 ,inplace= True)

data['season'].unique()

array([1, 2, 3, 4], dtype=int64)

data['season'] = data['season'].replace({1 : 'spring',
                                         2: 'summer',
                                         3 : 'fall',
                                          4 : 'winter'})

data['month'] = data['month'].replace({1 : 'january',
                                       2 : 'february',
                                       3 : 'march',
                                       4 : 'april',
                                       5 : 'may',
                                       6 : 'june',
                                       7 : 'july',
                                       8 : 'august',
                                       9 : 'september',
                                       10 : 'october',
                                      11 : 'november',
                                      12 :'december' })

data['weekday'].unique()

array([1, 2, 3, 4, 5, 6, 0], dtype=int64)

data['weekday'] = data['weekday'].replace({0 :'Sunday',   
                                           1: 'Monday',
                                           2: 'Tuesday', 
                                           3: 'Wednesday',
                                           4: 'Thursday',
                                           5: 'Friday',
                                           6: 'Saturday'})

data['weathersit'].unique()

array([2, 1, 3], dtype=int64)

data['weathersit'] = data['weathersit'].replace({1 : 'clear' ,2: 'cloudy' , 3: 'light_rain' , 4: 'heavy_rain'                                                 
})

data.head()

import matplotlib.pyplot as plt
import seaborn as sns

#visulaizing the numerical variable
sns.pairplot(data=data,x_vars=['Temperature','humidity','windspeed'],y_vars='cnt',kind='scatter',height=5,aspect=1);

data = data.drop(index = data[(data['Temperature'] > 15) & (data['Temperature'] < 20) & (data['cnt'] < 100)].index)
data = data.drop(index = data[(data['Temperature'] > 25) & (data['Temperature'] < 30) & (data['cnt'] < 2000)].index)

data = data.drop(index = data[(data['humidity'] < 20)].index)

data = data.drop(index = data[(data['windspeed'] > 30)].index)

data.shape

(725, 11)

cat_col =  data[['holiday','workingday','season','weekday','weathersit','month']]

def plotting(data, column):
    fig = plt.figure(figsize=(8,6))

    ax1 = plt.subplot(2,2,1)
    sns.boxplot(data = data, palette='Set1',x = column, y = 'cnt', ax = ax1)
    plt.title('Plotting data for the column: '+ column)
    plt.xticks(rotation=90)

    plt.tight_layout() # Or equivalently,  "plt.tight_layout()"

    plt.show()

for i in cat_col:
    plotting(data, i)

# Dropping outliers in Categorical Variables 
data = data.drop(index = data[(data['season'] == 'spring') & (data['cnt'] > 7000)].index)

data.shape

(724, 11)

data.head()

sns.pairplot(data,vars=["Temperature","humidity","windspeed","cnt"])

<seaborn.axisgrid.PairGrid at 0x200b3821ac0>

plt.figure(figsize = (6, 5))
sns.heatmap(data.corr(numeric_only = True), annot = True, cmap = 'OrRd')
plt.show()

## creating dummy variable 
season_dummies = pd.get_dummies(data['season'] ,dtype= int ,drop_first=True)
month_dummies = pd.get_dummies(data['month'],dtype= int,drop_first=True)
weekday_dummies = pd.get_dummies(data['weekday'],dtype= int,drop_first= True)
weathersit_dummies = pd.get_dummies(data['weathersit'],dtype= int,drop_first= True)

# Concat dummy dataframe with original one
data = pd.concat([data, season_dummies,month_dummies,weekday_dummies,weathersit_dummies], axis =1)
data.head()

# Deleting original variables
data = data.drop(columns=['season','month','weekday','weathersit'] )

data.columns

Index(['year', 'holiday', 'workingday', 'Temperature', 'humidity', 'windspeed',
       'cnt', 'spring', 'summer', 'winter', 'august', 'december', 'february',
       'january', 'july', 'june', 'march', 'may', 'november', 'october',
       'september', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',
       'Wednesday', 'cloudy', 'light_rain'],
      dtype='object')

data.shape

(724, 29)

from sklearn.model_selection import train_test_split

data_train,data_test = train_test_split(data,train_size=0.7,random_state=100)

print(data_train.shape)
print(data_test.shape)

(506, 29)
(218, 29)

pd.set_option('display.max_columns',None)

data.head()

data.shape

(724, 29)

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

num_vars =  ['Temperature','humidity','windspeed','cnt']

data_train[num_vars]  = scaler.fit_transform(data_train[num_vars] )
data_train.head()

data_train.describe()

data.shape

(724, 29)

# Let's check the correlation coefficients to see which variables are highly correlated
plt.figure(figsize = (30,15))
sns.heatmap(data_train.corr(),annot=True, cmap = "YlGnBu")
plt.show()

data.shape

(724, 29)

y_train = data_train.pop('cnt')
X_train = data_train

X_train.head()

y_train.head()

422    0.469757
728    0.164795
614    0.853918
113    0.453942
579    0.814198
Name: cnt, dtype: float64

import statsmodels.api as sm

X_train_lm = sm.add_constant(X_train)

m1 = sm.OLS(y_train, X_train_lm).fit()

m1.params

const          0.377624
year           0.242509
holiday       -0.164266
workingday    -0.104056
Temperature    0.467470
humidity      -0.147343
windspeed     -0.119137
spring        -0.090295
summer         0.017066
winter         0.106043
august        -0.002092
december      -0.068920
february      -0.035135
january       -0.061048
july          -0.037722
june          -0.004828
march          0.004660
may            0.035879
november      -0.075464
october        0.007204
september      0.067729
Monday        -0.010415
Saturday      -0.094819
Sunday        -0.094949
Thursday       0.008495
Tuesday       -0.028485
Wednesday     -0.002381
cloudy        -0.048702
light_rain    -0.182129
dtype: float64

print(m1.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.859
Model:                            OLS   Adj. R-squared:                  0.851
Method:                 Least Squares   F-statistic:                     103.9
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          2.65e-183
Time:                        11:11:02   Log-Likelihood:                 504.01
No. Observations:                 506   AIC:                            -950.0
Df Residuals:                     477   BIC:                            -827.4
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.3776      0.086      4.388      0.000       0.209       0.547
year            0.2425      0.008     28.564      0.000       0.226       0.259
holiday        -0.1643      0.066     -2.470      0.014      -0.295      -0.034
workingday     -0.1041      0.072     -1.450      0.148      -0.245       0.037
Temperature     0.4675      0.048      9.684      0.000       0.373       0.562
humidity       -0.1473      0.030     -4.888      0.000      -0.207      -0.088
windspeed      -0.1191      0.024     -4.977      0.000      -0.166      -0.072
spring         -0.0903      0.031     -2.899      0.004      -0.152      -0.029
summer          0.0171      0.028      0.610      0.542      -0.038       0.072
winter          0.1060      0.028      3.820      0.000       0.052       0.161
august         -0.0021      0.036     -0.059      0.953      -0.072       0.068
december       -0.0689      0.035     -1.993      0.047      -0.137      -0.001
february       -0.0351      0.035     -1.012      0.312      -0.103       0.033
january        -0.0610      0.035     -1.741      0.082      -0.130       0.008
july           -0.0377      0.036     -1.037      0.300      -0.109       0.034
june           -0.0048      0.026     -0.184      0.854      -0.056       0.047
march           0.0047      0.026      0.179      0.858      -0.047       0.056
may             0.0359      0.022      1.664      0.097      -0.007       0.078
november       -0.0755      0.037     -2.021      0.044      -0.149      -0.002
october         0.0072      0.037      0.196      0.845      -0.065       0.079
september       0.0677      0.033      2.027      0.043       0.002       0.133
Monday         -0.0104      0.016     -0.648      0.517      -0.042       0.021
Saturday       -0.0948      0.072     -1.309      0.191      -0.237       0.048
Sunday         -0.0949      0.072     -1.311      0.191      -0.237       0.047
Thursday        0.0085      0.016      0.535      0.593      -0.023       0.040
Tuesday        -0.0285      0.016     -1.832      0.068      -0.059       0.002
Wednesday      -0.0024      0.016     -0.150      0.881      -0.034       0.029
cloudy         -0.0487      0.012     -4.182      0.000      -0.072      -0.026
light_rain     -0.1821      0.035     -5.252      0.000      -0.250      -0.114
==============================================================================
Omnibus:                      109.188   Durbin-Watson:                   1.939
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              296.697
Skew:                          -1.050   Prob(JB):                     3.74e-65
Kurtosis:                       6.108   Cond. No.                         68.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

from statsmodels.stats.outliers_influence import variance_inflation_factor

vif = pd.DataFrame()
X = X_train
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

from sklearn.feature_selection import RFE

from sklearn.linear_model import LinearRegression

lm = LinearRegression()
lm.fit(X_train,y_train)

rfe = RFE(lm, n_features_to_select=20, step = 1)
rfe = rfe.fit(X_train, y_train)

rfe_ranking = pd.DataFrame(list(zip(X_train.columns, rfe.support_,rfe.ranking_)), columns=['features','Support',"Rank"])
rfe_ranking.sort_values(by='Rank', ascending = True)

col = X_train.columns[rfe.support_]
col

Index(['year', 'holiday', 'workingday', 'Temperature', 'humidity', 'windspeed',
       'spring', 'winter', 'december', 'february', 'january', 'july', 'may',
       'november', 'september', 'Saturday', 'Sunday', 'Tuesday', 'cloudy',
       'light_rain'],
      dtype='object')

X_train.columns[~rfe.support_]

Index(['summer', 'august', 'june', 'march', 'october', 'Monday', 'Thursday',
       'Wednesday'],
      dtype='object')

X_train_rfe = X_train[col]

X_train_rfe.head()

X_train_lm = sm.add_constant(X_train_rfe)

lm = sm.OLS(y_train, X_train_rfe).fit()

print(lm.summary())

                                 OLS Regression Results                                
=======================================================================================
Dep. Variable:                    cnt   R-squared (uncentered):                   0.972
Model:                            OLS   Adj. R-squared (uncentered):              0.971
Method:                 Least Squares   F-statistic:                              836.4
Date:                Tue, 11 Mar 2025   Prob (F-statistic):                        0.00
Time:                        11:11:07   Log-Likelihood:                          489.37
No. Observations:                 506   AIC:                                     -938.7
Df Residuals:                     486   BIC:                                     -854.2
Df Model:                          20                                                  
Covariance Type:            nonrobust                                                  
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
year            0.2472      0.009     28.910      0.000       0.230       0.264
holiday         0.1302      0.034      3.853      0.000       0.064       0.197
workingday      0.2350      0.027      8.863      0.000       0.183       0.287
Temperature     0.5014      0.034     14.833      0.000       0.435       0.568
humidity       -0.1380      0.030     -4.606      0.000      -0.197      -0.079
windspeed      -0.0944      0.024     -3.977      0.000      -0.141      -0.048
spring         -0.0814      0.019     -4.295      0.000      -0.119      -0.044
winter          0.1145      0.015      7.733      0.000       0.085       0.144
december       -0.0694      0.018     -3.811      0.000      -0.105      -0.034
february       -0.0317      0.023     -1.391      0.165      -0.076       0.013
january        -0.0560      0.022     -2.536      0.012      -0.099      -0.013
july           -0.0437      0.018     -2.427      0.016      -0.079      -0.008
may             0.0517      0.016      3.168      0.002       0.020       0.084
november       -0.0812      0.019     -4.180      0.000      -0.119      -0.043
september       0.0619      0.017      3.709      0.000       0.029       0.095
Saturday        0.2423      0.028      8.751      0.000       0.188       0.297
Sunday          0.2416      0.028      8.764      0.000       0.187       0.296
Tuesday        -0.0254      0.012     -2.114      0.035      -0.049      -0.002
cloudy         -0.0457      0.012     -3.892      0.000      -0.069      -0.023
light_rain     -0.1798      0.035     -5.160      0.000      -0.248      -0.111
==============================================================================
Omnibus:                      113.859   Durbin-Watson:                   1.973
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              325.332
Skew:                          -1.076   Prob(JB):                     2.26e-71
Kurtosis:                       6.287   Cond. No.                         20.0
==============================================================================

Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['workingday'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m3 = sm.OLS(y_train, X_train_lm).fit()


print(m3.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.858
Model:                            OLS   Adj. R-squared:                  0.852
Method:                 Least Squares   F-statistic:                     154.2
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          7.94e-192
Time:                        11:11:09   Log-Likelihood:                 501.56
No. Observations:                 506   AIC:                            -963.1
Df Residuals:                     486   BIC:                            -878.6
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2914      0.028     10.316      0.000       0.236       0.347
year            0.2439      0.008     29.131      0.000       0.227       0.260
holiday        -0.0726      0.024     -2.980      0.003      -0.121      -0.025
Temperature     0.4482      0.035     12.907      0.000       0.380       0.516
humidity       -0.1489      0.029     -5.087      0.000      -0.206      -0.091
windspeed      -0.1156      0.024     -4.917      0.000      -0.162      -0.069
spring         -0.0978      0.019     -5.202      0.000      -0.135      -0.061
winter          0.1031      0.015      7.045      0.000       0.074       0.132
december       -0.0784      0.018     -4.388      0.000      -0.113      -0.043
february       -0.0418      0.022     -1.872      0.062      -0.086       0.002
january        -0.0696      0.022     -3.201      0.001      -0.112      -0.027
july           -0.0410      0.018     -2.333      0.020      -0.076      -0.006
may             0.0465      0.016      2.913      0.004       0.015       0.078
november       -0.0865      0.019     -4.557      0.000      -0.124      -0.049
september       0.0618      0.016      3.796      0.000       0.030       0.094
Saturday        0.0087      0.012      0.716      0.474      -0.015       0.033
Sunday          0.0080      0.012      0.671      0.502      -0.015       0.032
Tuesday        -0.0271      0.012     -2.315      0.021      -0.050      -0.004
cloudy         -0.0472      0.011     -4.121      0.000      -0.070      -0.025
light_rain     -0.1791      0.034     -5.268      0.000      -0.246      -0.112
==============================================================================
Omnibus:                      109.357   Durbin-Watson:                   1.938
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              302.518
Skew:                          -1.045   Prob(JB):                     2.04e-66
Kurtosis:                       6.159   Cond. No.                         17.4
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['humidity'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m4 = sm.OLS(y_train, X_train_lm).fit()


print(m4.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.850
Model:                            OLS   Adj. R-squared:                  0.845
Method:                 Least Squares   F-statistic:                     153.5
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          1.75e-187
Time:                        11:11:11   Log-Likelihood:                 488.44
No. Observations:                 506   AIC:                            -938.9
Df Residuals:                     487   BIC:                            -858.6
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2487      0.028      8.994      0.000       0.194       0.303
year            0.2508      0.008     29.622      0.000       0.234       0.267
holiday        -0.0707      0.025     -2.827      0.005      -0.120      -0.022
Temperature     0.4016      0.034     11.695      0.000       0.334       0.469
windspeed      -0.0841      0.023     -3.618      0.000      -0.130      -0.038
spring         -0.1065      0.019     -5.546      0.000      -0.144      -0.069
winter          0.0935      0.015      6.281      0.000       0.064       0.123
december       -0.0885      0.018     -4.865      0.000      -0.124      -0.053
february       -0.0425      0.023     -1.854      0.064      -0.087       0.003
january        -0.0720      0.022     -3.228      0.001      -0.116      -0.028
july           -0.0305      0.018     -1.704      0.089      -0.066       0.005
may             0.0349      0.016      2.154      0.032       0.003       0.067
november       -0.0918      0.019     -4.725      0.000      -0.130      -0.054
september       0.0531      0.017      3.198      0.001       0.020       0.086
Saturday        0.0120      0.012      0.960      0.337      -0.013       0.037
Sunday          0.0110      0.012      0.896      0.371      -0.013       0.035
Tuesday        -0.0306      0.012     -2.552      0.011      -0.054      -0.007
cloudy         -0.0851      0.009     -9.528      0.000      -0.103      -0.068
light_rain     -0.2573      0.031     -8.276      0.000      -0.318      -0.196
==============================================================================
Omnibus:                      105.198   Durbin-Watson:                   1.892
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              292.588
Skew:                          -1.004   Prob(JB):                     2.92e-64
Kurtosis:                       6.138   Cond. No.                         16.3
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['windspeed'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m5 = sm.OLS(y_train, X_train_lm).fit()


print(m5.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.846
Model:                            OLS   Adj. R-squared:                  0.841
Method:                 Least Squares   F-statistic:                     157.9
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          8.41e-186
Time:                        11:11:13   Log-Likelihood:                 481.73
No. Observations:                 506   AIC:                            -927.5
Df Residuals:                     488   BIC:                            -851.4
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2021      0.025      8.159      0.000       0.153       0.251
year            0.2502      0.009     29.191      0.000       0.233       0.267
holiday        -0.0712      0.025     -2.814      0.005      -0.121      -0.021
Temperature     0.4156      0.035     12.030      0.000       0.348       0.483
spring         -0.1097      0.019     -5.652      0.000      -0.148      -0.072
winter          0.1021      0.015      6.864      0.000       0.073       0.131
december       -0.0850      0.018     -4.618      0.000      -0.121      -0.049
february       -0.0367      0.023     -1.588      0.113      -0.082       0.009
january        -0.0647      0.022     -2.877      0.004      -0.109      -0.021
july           -0.0262      0.018     -1.448      0.148      -0.062       0.009
may             0.0368      0.016      2.247      0.025       0.005       0.069
november       -0.0933      0.020     -4.748      0.000      -0.132      -0.055
september       0.0592      0.017      3.543      0.000       0.026       0.092
Saturday        0.0122      0.013      0.968      0.333      -0.013       0.037
Sunday          0.0110      0.012      0.888      0.375      -0.013       0.035
Tuesday        -0.0321      0.012     -2.641      0.009      -0.056      -0.008
cloudy         -0.0835      0.009     -9.246      0.000      -0.101      -0.066
light_rain     -0.2701      0.031     -8.639      0.000      -0.331      -0.209
==============================================================================
Omnibus:                      101.379   Durbin-Watson:                   1.943
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              282.326
Skew:                          -0.968   Prob(JB):                     4.94e-62
Kurtosis:                       6.106   Cond. No.                         15.1
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['Sunday'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m6 = sm.OLS(y_train, X_train_lm).fit()


print(m6.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.846
Model:                            OLS   Adj. R-squared:                  0.841
Method:                 Least Squares   F-statistic:                     167.8
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          9.45e-187
Time:                        11:11:14   Log-Likelihood:                 481.32
No. Observations:                 506   AIC:                            -928.6
Df Residuals:                     489   BIC:                            -856.8
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2047      0.025      8.329      0.000       0.156       0.253
year            0.2503      0.009     29.225      0.000       0.234       0.267
holiday        -0.0721      0.025     -2.853      0.005      -0.122      -0.022
Temperature     0.4150      0.035     12.017      0.000       0.347       0.483
spring         -0.1097      0.019     -5.653      0.000      -0.148      -0.072
winter          0.1020      0.015      6.863      0.000       0.073       0.131
december       -0.0851      0.018     -4.627      0.000      -0.121      -0.049
february       -0.0374      0.023     -1.617      0.107      -0.083       0.008
january        -0.0650      0.022     -2.894      0.004      -0.109      -0.021
july           -0.0260      0.018     -1.437      0.151      -0.061       0.010
may             0.0368      0.016      2.249      0.025       0.005       0.069
november       -0.0938      0.020     -4.772      0.000      -0.132      -0.055
september       0.0594      0.017      3.555      0.000       0.027       0.092
Saturday        0.0099      0.012      0.799      0.425      -0.014       0.034
Tuesday        -0.0344      0.012     -2.906      0.004      -0.058      -0.011
cloudy         -0.0833      0.009     -9.226      0.000      -0.101      -0.066
light_rain     -0.2711      0.031     -8.679      0.000      -0.332      -0.210
==============================================================================
Omnibus:                       98.326   Durbin-Watson:                   1.942
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              270.326
Skew:                          -0.944   Prob(JB):                     1.99e-59
Kurtosis:                       6.043   Cond. No.                         15.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['Saturday'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m7 = sm.OLS(y_train, X_train_lm).fit()


print(m7.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.846
Model:                            OLS   Adj. R-squared:                  0.841
Method:                 Least Squares   F-statistic:                     179.0
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          9.53e-188
Time:                        11:11:16   Log-Likelihood:                 480.99
No. Observations:                 506   AIC:                            -930.0
Df Residuals:                     490   BIC:                            -862.4
Df Model:                          15                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2065      0.024      8.437      0.000       0.158       0.255
year            0.2503      0.009     29.237      0.000       0.234       0.267
holiday        -0.0727      0.025     -2.879      0.004      -0.122      -0.023
Temperature     0.4150      0.035     12.023      0.000       0.347       0.483
spring         -0.1100      0.019     -5.669      0.000      -0.148      -0.072
winter          0.1019      0.015      6.855      0.000       0.073       0.131
december       -0.0850      0.018     -4.624      0.000      -0.121      -0.049
february       -0.0373      0.023     -1.615      0.107      -0.083       0.008
january        -0.0657      0.022     -2.927      0.004      -0.110      -0.022
july           -0.0258      0.018     -1.430      0.153      -0.061       0.010
may             0.0365      0.016      2.230      0.026       0.004       0.069
november       -0.0936      0.020     -4.766      0.000      -0.132      -0.055
september       0.0588      0.017      3.524      0.000       0.026       0.092
Tuesday        -0.0361      0.012     -3.093      0.002      -0.059      -0.013
cloudy         -0.0830      0.009     -9.206      0.000      -0.101      -0.065
light_rain     -0.2717      0.031     -8.705      0.000      -0.333      -0.210
==============================================================================
Omnibus:                       99.001   Durbin-Watson:                   1.939
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              274.529
Skew:                          -0.947   Prob(JB):                     2.44e-60
Kurtosis:                       6.071   Cond. No.                         14.9
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['july'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m8 = sm.OLS(y_train, X_train_lm).fit()


print(m8.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.845
Model:                            OLS   Adj. R-squared:                  0.841
Method:                 Least Squares   F-statistic:                     191.3
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          1.87e-188
Time:                        11:11:17   Log-Likelihood:                 479.93
No. Observations:                 506   AIC:                            -929.9
Df Residuals:                     491   BIC:                            -866.5
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2140      0.024      8.945      0.000       0.167       0.261
year            0.2508      0.009     29.272      0.000       0.234       0.268
holiday        -0.0739      0.025     -2.924      0.004      -0.123      -0.024
Temperature     0.3957      0.032     12.444      0.000       0.333       0.458
spring         -0.1096      0.019     -5.645      0.000      -0.148      -0.071
winter          0.1039      0.015      7.011      0.000       0.075       0.133
december       -0.0873      0.018     -4.765      0.000      -0.123      -0.051
february       -0.0396      0.023     -1.718      0.086      -0.085       0.006
january        -0.0693      0.022     -3.106      0.002      -0.113      -0.025
may             0.0418      0.016      2.621      0.009       0.010       0.073
november       -0.0955      0.020     -4.868      0.000      -0.134      -0.057
september       0.0643      0.016      3.956      0.000       0.032       0.096
Tuesday        -0.0371      0.012     -3.190      0.002      -0.060      -0.014
cloudy         -0.0829      0.009     -9.183      0.000      -0.101      -0.065
light_rain     -0.2740      0.031     -8.782      0.000      -0.335      -0.213
==============================================================================
Omnibus:                      101.339   Durbin-Watson:                   1.935
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              282.861
Skew:                          -0.967   Prob(JB):                     3.78e-62
Kurtosis:                       6.111   Cond. No.                         14.0
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['february'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


m9 = sm.OLS(y_train, X_train_lm).fit()


print(m9.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.844
Model:                            OLS   Adj. R-squared:                  0.840
Method:                 Least Squares   F-statistic:                     204.9
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          5.54e-189
Time:                        11:11:19   Log-Likelihood:                 478.42
No. Observations:                 506   AIC:                            -928.8
Df Residuals:                     492   BIC:                            -869.7
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.2065      0.024      8.761      0.000       0.160       0.253
year            0.2505      0.009     29.188      0.000       0.234       0.267
holiday        -0.0761      0.025     -3.009      0.003      -0.126      -0.026
Temperature     0.4060      0.031     12.980      0.000       0.345       0.468
spring         -0.1255      0.017     -7.336      0.000      -0.159      -0.092
winter          0.1023      0.015      6.904      0.000       0.073       0.131
december       -0.0772      0.017     -4.440      0.000      -0.111      -0.043
january        -0.0483      0.019     -2.582      0.010      -0.085      -0.012
may             0.0423      0.016      2.644      0.008       0.011       0.074
november       -0.0905      0.019     -4.655      0.000      -0.129      -0.052
september       0.0648      0.016      3.974      0.000       0.033       0.097
Tuesday        -0.0369      0.012     -3.162      0.002      -0.060      -0.014
cloudy         -0.0822      0.009     -9.097      0.000      -0.100      -0.064
light_rain     -0.2726      0.031     -8.723      0.000      -0.334      -0.211
==============================================================================
Omnibus:                       96.153   Durbin-Watson:                   1.934
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              259.618
Skew:                          -0.930   Prob(JB):                     4.21e-57
Kurtosis:                       5.976   Cond. No.                         13.7
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

X_train_rfe = X_train_rfe.drop(['january'], axis = 1)

X_train_lm = sm.add_constant(X_train_rfe)


Final_model= sm.OLS(y_train, X_train_lm).fit()


print(Final_model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    cnt   R-squared:                       0.842
Model:                            OLS   Adj. R-squared:                  0.838
Method:                 Least Squares   F-statistic:                     219.0
Date:                Tue, 11 Mar 2025   Prob (F-statistic):          9.90e-189
Time:                        11:11:22   Log-Likelihood:                 475.01
No. Observations:                 506   AIC:                            -924.0
Df Residuals:                     493   BIC:                            -869.1
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
===============================================================================
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.1944      0.023      8.367      0.000       0.149       0.240
year            0.2499      0.009     28.966      0.000       0.233       0.267
holiday        -0.0794      0.025     -3.129      0.002      -0.129      -0.030
Temperature     0.4242      0.031     13.834      0.000       0.364       0.484
spring         -0.1367      0.017     -8.218      0.000      -0.169      -0.104
winter          0.1026      0.015      6.888      0.000       0.073       0.132
december       -0.0667      0.017     -3.925      0.000      -0.100      -0.033
may             0.0433      0.016      2.691      0.007       0.012       0.075
november       -0.0849      0.019     -4.370      0.000      -0.123      -0.047
september       0.0648      0.016      3.956      0.000       0.033       0.097
Tuesday        -0.0374      0.012     -3.191      0.002      -0.060      -0.014
cloudy         -0.0831      0.009     -9.147      0.000      -0.101      -0.065
light_rain     -0.2716      0.031     -8.642      0.000      -0.333      -0.210
==============================================================================
Omnibus:                       90.162   Durbin-Watson:                   1.928
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              229.215
Skew:                          -0.894   Prob(JB):                     1.68e-50
Kurtosis:                       5.770   Cond. No.                         13.4
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

vif = pd.DataFrame()
X = X_train_rfe
vif['Features'] = X.columns
vif['VIF'] = [variance_inflation_factor(X.values,i) for i in range (X.shape[1])]
vif['VIF'] = round(vif['VIF'],2)
vif = vif.sort_values(by = 'VIF', ascending= False)
vif

data.head()

X_train_lm = sm.add_constant(X_train_rfe)

y_train_pred = Final_model.predict(X_train_lm)

fig = plt.figure()
sns.distplot((y_train - y_train_pred), bins = 20)
fig.suptitle('Error terms', fontsize = 20)
plt.xlabel('Errors', fontsize= 18)

Text(0.5, 0, 'Errors')

num_vars = ['Temperature','humidity','windspeed','cnt']

data_test[num_vars] = scaler.transform(data_test[num_vars])

data_test.describe()

y_test = data_test.pop('cnt')
X_test = data_test

X_test_new = sm.add_constant(X_test)

X_train_rfe.head()

# DataFrames of Selected Features
col = ['year', 'holiday', 'Temperature', 'spring', 'winter','december','may',"november",'september',
                            'Tuesday', 'cloudy','light_rain']

X_test_new = X_test[col]

X_test_new = sm.add_constant(X_test_new)

y_test_pred = Final_model.predict(X_test_new)

fig = plt.figure()
plt.scatter(y_test, y_test_pred)
fig.suptitle('y_test vs y_test_pred', fontsize = 20)              # Plot heading 
plt.xlabel('y_test', fontsize = 18)                          # X-label
plt.ylabel('y_pred', fontsize = 16)                          # y - label

Text(0, 0.5, 'y_pred')

# plotting a Regression plot

plt.figure()
sns.regplot(x=y_test, y=y_test_pred, ci=68, fit_reg=True,scatter_kws={"color": "blue"}, line_kws={"color": "red"})
plt.title('y_test vs y_test_pred', fontsize=15)
plt.xlabel('y_test', fontsize=15)
plt.ylabel('y_pred', fontsize=15)
plt.show()

from sklearn.metrics import mean_squared_error,r2_score

# Calculate mean squared error of the test set
mse_test = mean_squared_error(y_test, y_test_pred)

# Calculate RMSE
rmse_test = np.sqrt(mse_test)

print(rmse_test)

0.09449604122254024

# R squared of training and test data
rsquared_train = r2_score(y_train, y_train_pred)
rsquared_test = r2_score(y_test, y_test_pred)

#print the squared
print('R-squared for train data:',rsquared_train)
print('R-squared for test data:',rsquared_test)

R-squared for train data: 0.8420110687421533
R-squared for test data: 0.8066246590011492

# Coefficients of the final model
round(Final_model.params, 4)

const          0.1944
year           0.2499
holiday       -0.0794
Temperature    0.4242
spring        -0.1367
winter         0.1026
december      -0.0667
may            0.0433
november      -0.0849
september      0.0648
Tuesday       -0.0374
cloudy        -0.0831
light_rain    -0.2716
dtype: float64

	instant	dteday	season	mnth	weekday	workingday	weathersit	temp	atemp	hum	windspeed	casual	registered	cnt
0	1	01-01-2018	1	1	1	1	2	14.110847	18.18125	80.5833	10.749882	331	654	985
1	2	02-01-2018	1	1	2	1	2	14.902598	17.68695	69.6087	16.652113	131	670	801
2	3	03-01-2018	1	1	3	1	1	8.050924	9.47025	43.7273	16.636703	120	1229	1349
3	4	04-01-2018	1	1	4	1	1	8.200000	10.60610	59.0435	10.739832	108	1454	1562
4	5	05-01-2018	1	1	5	1	1	9.305237	11.46350	43.6957	12.522300	82	1518	1600

	instant	season	yr	mnth	holiday	weekday	workingday	weathersit	temp	atemp	hum	windspeed	casual	registered	cnt
count	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000	730.000000
mean	365.500000	2.498630	0.500000	6.526027	0.028767	2.995890	0.690411	1.394521	20.319259	23.726322	62.765175	12.763620	849.249315	3658.757534	4508.006849
std	210.877136	1.110184	0.500343	3.450215	0.167266	2.000339	0.462641	0.544807	7.506729	8.150308	14.237589	5.195841	686.479875	1559.758728	1936.011647
min	1.000000	1.000000	0.000000	1.000000	0.000000	0.000000	0.000000	1.000000	2.424346	3.953480	0.000000	1.500244	2.000000	20.000000	22.000000
25%	183.250000	2.000000	0.000000	4.000000	0.000000	1.000000	0.000000	1.000000	13.811885	16.889713	52.000000	9.041650	316.250000	2502.250000	3169.750000
50%	365.500000	3.000000	0.500000	7.000000	0.000000	3.000000	1.000000	1.000000	20.465826	24.368225	62.625000	12.125325	717.000000	3664.500000	4548.500000
75%	547.750000	3.000000	1.000000	10.000000	0.000000	5.000000	1.000000	2.000000	26.880615	30.445775	72.989575	15.625589	1096.500000	4783.250000	5966.000000
max	730.000000	4.000000	1.000000	12.000000	1.000000	6.000000	1.000000	3.000000	35.328347	42.044800	97.250000	34.000021	3410.000000	6946.000000	8714.000000

	instant	dteday	season	month	weekday	workingday	weathersit	Temperature	atemp	humidity	windspeed	casual	registered	cnt
0	1	01-01-2018	1	1	1	1	2	14.110847	18.18125	80.5833	10.749882	331	654	985
1	2	02-01-2018	1	1	2	1	2	14.902598	17.68695	69.6087	16.652113	131	670	801
2	3	03-01-2018	1	1	3	1	1	8.050924	9.47025	43.7273	16.636703	120	1229	1349
3	4	04-01-2018	1	1	4	1	1	8.200000	10.60610	59.0435	10.739832	108	1454	1562
4	5	05-01-2018	1	1	5	1	1	9.305237	11.46350	43.6957	12.522300	82	1518	1600

	season	month	weekday	workingday	weathersit	Temperature	atemp	humidity	windspeed	cnt
0	1	1	1	1	2	14.110847	18.18125	80.5833	10.749882	985
1	1	1	2	1	2	14.902598	17.68695	69.6087	16.652113	801
2	1	1	3	1	1	8.050924	9.47025	43.7273	16.636703	1349
3	1	1	4	1	1	8.200000	10.60610	59.0435	10.739832	1562
4	1	1	5	1	1	9.305237	11.46350	43.6957	12.522300	1600

	Temperature	atemp	humidity	windspeed
Temperature	1.000000	0.991696	0.128565	-0.158186
atemp	0.991696	1.000000	0.141512	-0.183876
humidity	0.128565	0.141512	1.000000	-0.248506
windspeed	-0.158186	-0.183876	-0.248506	1.000000

Problem Statement¶

Business Goal¶

Table of content¶

Reading and Understanding Data¶

Data Quality Check¶

Changing some categorical variable for further analysis of data continous and categorical separately¶

Checking correlation between numerical variables¶

Visualising the Data¶

Visulization on numerical columns¶

Handlig Outliers for numerical columns¶

Visulization on categorical columns¶

Handling Outliers for categorical variable¶

Data Preparation¶

Creating Dummy Variables¶

Splitting data into train and test¶

Rescaling variables using MinMax Scaler¶

Training The Model¶

Model 1¶

Checking Multicolinearity Using VIF¶

Recursive Feature Elimination¶

Model 2 : Using RFE¶

Model 3¶

Model 4¶

Model 5¶

Model 6¶

Model 7¶

Model 8¶

Model 9¶

Final Model¶

Residual Analysis of Train Data¶

Predictions on test Data¶

Conclusion¶

Regression Equation¶

**************¶

	season	month	weekday	workingday	weathersit	Temperature	humidity	windspeed	cnt
0	spring	january	Monday	1	cloudy	14.110847	80.5833	10.749882	985
1	spring	january	Tuesday	1	cloudy	14.902598	69.6087	16.652113	801
2	spring	january	Wednesday	1	clear	8.050924	43.7273	16.636703	1349
3	spring	january	Thursday	1	clear	8.200000	59.0435	10.739832	1562
4	spring	january	Friday	1	clear	9.305237	43.6957	12.522300	1600

	year	workingday	Temperature	humidity	windspeed	cnt	spring	summer	august	december	february	september	Monday	Saturday	Tuesday	Wednesday	cloudy
422	1	1	0.383206	0.281444	0.615256	0.469757	1	0	0	0	1	0	0	0	0	1	0
728	1	1	0.245101	0.270255	0.822447	0.164795	1	0	0	1	0	0	1	0	0	0	0
614	1	0	0.802708	0.647560	0.373836	0.853918	0	0	0	0	0	1	0	1	0	0	0
113	0	1	0.651106	0.758823	0.425255	0.453942	0	1	0	0	0	0	0	0	1	0	1
579	1	0	0.880586	0.507702	0.484409	0.814198	0	0	1	0	0	0	0	1	0	0	1

	year	holiday	workingday	Temperature	humidity	windspeed	cnt	spring	summer	winter	august	december	february	january	july	june	march	may	november	october	september	Monday	Saturday	Sunday	Thursday	Tuesday	Wednesday	cloudy	light_rain
count	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000
mean	0.507905	0.029644	0.683794	0.536409	0.490968	0.423239	0.492988	0.243083	0.245059	0.256917	0.084980	0.092885	0.069170	0.088933	0.084980	0.069170	0.086957	0.088933	0.088933	0.084980	0.081028	0.136364	0.142292	0.148221	0.138340	0.158103	0.146245	0.369565	0.019763
std	0.500432	0.169772	0.465454	0.227992	0.210552	0.190072	0.238332	0.429369	0.430548	0.437366	0.279128	0.290559	0.253994	0.284928	0.279128	0.253994	0.282050	0.284928	0.284928	0.279128	0.273148	0.343514	0.349696	0.355671	0.345598	0.365198	0.353701	0.483165	0.139322
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.000000	0.000000	0.000000	0.333364	0.323713	0.281934	0.322226	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	1.000000	0.000000	1.000000	0.532212	0.490297	0.408108	0.496499	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
75%	1.000000	0.000000	1.000000	0.736253	0.646471	0.532182	0.677472	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000
max	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

	Features	VIF
2	workingday	55.75
3	Temperature	42.31
4	humidity	15.48
6	spring	13.28
22	Sunday	12.05
21	Saturday	11.84
8	winter	11.57
7	summer	10.05
5	windspeed	7.24
17	november	6.79
18	october	6.40
13	july	6.35
9	august	5.98
10	december	5.93
12	january	5.88
19	september	4.95
11	february	4.57
15	march	3.30
26	cloudy	2.99
1	holiday	2.95
14	june	2.79
16	may	2.45
24	Tuesday	2.26
0	year	2.18
25	Wednesday	2.17
20	Monday	2.08
23	Thursday	2.07
27	light_rain	1.42

	features	Support	Rank
0	year	True	1
24	Tuesday	True	1
22	Sunday	True	1
21	Saturday	True	1
19	september	True	1
17	november	True	1
16	may	True	1
26	cloudy	True	1
12	january	True	1
11	february	True	1
13	july	True	1
4	humidity	True	1
1	holiday	True	1
2	workingday	True	1
3	Temperature	True	1
10	december	True	1
5	windspeed	True	1
27	light_rain	True	1
8	winter	True	1
6	spring	True	1
7	summer	False	2
23	Thursday	False	3
20	Monday	False	4
18	october	False	5
15	march	False	6
14	june	False	7
25	Wednesday	False	8
9	august	False	9

	Features	VIF
2	workingday	27.61
3	Temperature	22.29
4	humidity	14.72
5	windspeed	6.96
16	Sunday	6.47
15	Saturday	6.27
6	spring	5.01
7	winter	3.24
18	cloudy	2.93
10	january	2.49
0	year	2.13
9	february	2.06
1	holiday	1.94
13	november	1.93
8	december	1.77
11	july	1.58
19	light_rain	1.38
12	may	1.36
17	Tuesday	1.31
14	september	1.30

	Features	VIF
3	humidity	13.42
2	Temperature	12.43
4	windspeed	5.33
5	spring	4.45
17	cloudy	2.92
6	winter	2.90
9	january	2.35
0	year	2.12
8	february	2.00
12	november	1.86
7	december	1.71
10	july	1.56
18	light_rain	1.37
11	may	1.36
16	Tuesday	1.30
13	september	1.29
14	Saturday	1.26
15	Sunday	1.26
1	holiday	1.06

	Features	VIF
2	Temperature	6.37
3	windspeed	5.23
4	spring	4.22
5	winter	2.70
8	january	2.32
0	year	2.08
7	february	2.00
11	november	1.83
6	december	1.65
16	cloudy	1.62
9	july	1.52
10	may	1.32
15	Tuesday	1.29
12	september	1.28
13	Saturday	1.26
14	Sunday	1.26
17	light_rain	1.09
1	holiday	1.06

	Features	VIF
2	Temperature	4.01
3	spring	3.83
4	winter	2.69
7	january	2.31
0	year	2.07
6	february	1.99
10	november	1.79
5	december	1.64
15	cloudy	1.62
8	july	1.48
9	may	1.31
14	Tuesday	1.28
11	september	1.27
12	Saturday	1.26
13	Sunday	1.26
1	holiday	1.06
16	light_rain	1.06

	Features	VIF
2	Temperature	3.82
3	spring	3.82
4	winter	2.69
7	january	2.31
0	year	2.07
6	february	1.99
10	november	1.79
5	december	1.64
14	cloudy	1.61
8	july	1.48
9	may	1.31
11	september	1.27
13	Tuesday	1.23
12	Saturday	1.21
1	holiday	1.06
15	light_rain	1.06

	Features	VIF
3	spring	3.81
2	Temperature	3.66
4	winter	2.68
7	january	2.31
0	year	2.07
6	february	1.99
10	november	1.79
5	december	1.63
13	cloudy	1.61
8	july	1.48
9	may	1.31
11	september	1.26
12	Tuesday	1.19
1	holiday	1.06
14	light_rain	1.06

	Features	VIF
3	spring	3.73
2	Temperature	2.76
4	winter	2.58
7	january	2.30
0	year	2.06
6	february	1.99
9	november	1.79
5	december	1.63
12	cloudy	1.60
8	may	1.22
10	september	1.20
11	Tuesday	1.19
1	holiday	1.06
13	light_rain	1.06

	Features	VIF
2	Temperature	2.75
4	winter	2.52
0	year	2.06
3	spring	2.00
8	november	1.77
6	january	1.66
11	cloudy	1.60
5	december	1.49
7	may	1.22
9	september	1.20
10	Tuesday	1.19
12	light_rain	1.06
1	holiday	1.05

	Features	VIF
2	Temperature	2.72
4	winter	2.49
0	year	2.06
7	november	1.76
10	cloudy	1.59
5	december	1.44
3	spring	1.36
6	may	1.22
8	september	1.20
9	Tuesday	1.19
11	light_rain	1.06
1	holiday	1.05

	year	holiday	workingday	Temperature	humidity	windspeed	cnt	spring	summer	winter	august	december	february	january	july	june	march	may	november	october	september	Monday	Saturday	Sunday	Thursday	Tuesday	Wednesday	cloudy	light_rain
count	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.000000	218.00000	218.000000	218.000000	218.000000	218.000000	218.000000
mean	0.486239	0.027523	0.711009	0.560655	0.474024	0.408393	0.498156	0.247706	0.275229	0.215596	0.082569	0.068807	0.091743	0.077982	0.087156	0.114679	0.073394	0.077982	0.068807	0.082569	0.082569	0.155963	0.137615	0.12844	0.155963	0.110092	0.137615	0.261468	0.036697
std	0.500961	0.163978	0.454337	0.230041	0.199583	0.193546	0.215383	0.432674	0.447658	0.412182	0.275863	0.253709	0.289327	0.268760	0.282713	0.319367	0.261383	0.268760	0.253709	0.275863	0.275863	0.363656	0.345288	0.33535	0.363656	0.313724	0.345288	0.440446	0.188450
min	0.000000	0.000000	0.000000	0.046591	-0.071617	0.049875	0.009055	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.000000	0.000000	0.000000	0.366546	0.336799	0.277259	0.353042	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	0.000000	0.000000	1.000000	0.576343	0.476623	0.373049	0.501751	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	0.000000	0.000000
75%	1.000000	0.000000	1.000000	0.771557	0.616670	0.507796	0.637239	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.00000	0.000000	0.000000	0.000000	1.000000	0.000000
max	1.000000	1.000000	1.000000	0.984424	0.985082	1.049896	0.933961	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.00000	1.000000	1.000000	1.000000	1.000000	1.000000

Boom Bike Sharing¶

Problem Statement¶

Business Goal¶

Table of content¶

Reading and Understanding Data¶

Data Quality Check¶

Changing some categorical variable for further analysis of data continous and categorical separately¶

Checking correlation between numerical variables¶

Visualising the Data¶

Visulization on numerical columns¶

Handlig Outliers for numerical columns¶

Visulization on categorical columns¶

Handling Outliers for categorical variable¶

Data Preparation¶

Creating Dummy Variables¶

Splitting data into train and test¶

Rescaling variables using MinMax Scaler¶

Training The Model¶

Model 1¶

Checking Multicolinearity Using VIF¶

Recursive Feature Elimination¶

Model 2 : Using RFE¶

Model 3¶

Model 4¶

Model 5¶

Model 6¶

Model 7¶

Model 8¶

Model 9¶

Final Model¶

Residual Analysis of Train Data¶

Predictions on test Data¶

Conclusion¶

Regression Equation¶

**************¶